###############################################################
# Paper: Coordination with and without incentives
# Author: Diego Andrés Jorrat
# Co-authors: Pablo Brañas-Garza, Diego Jorrat, Pablo Lomas
# Date: 17 March 2026
#
# This R script is prepared for submission purposes.
# It contains all steps required to reproduce the
# tables and figures reported in the paper.
#
# Contents:
# - Table 1: Participants by study, academic level and gender
# - Figure 1: Study 1: Outcomes by incentive scheme
# - Figure 2: Study 2: Outcomes by incentive scheme
# - Figure A1: Study 2: Outcomes by incentive scheme and game order
###############################################################

# ============================================================
# 0. PACKAGES
# ============================================================

library(haven)
library(dplyr)
library(tidyr)
library(openxlsx)
library(ggplot2)
library(tibble)
library(scales)
library(grid)

# ============================================================
# 1. LOAD DATA
# ============================================================

setwd("/Users/pablodelomas/Desktop/")
df <- read_dta("CoordintationWithAndWithoutIncentives_basedata.dta")

# ============================================================
# 2. VARIABLE RENAMING
# ============================================================

df <- df %>%
  rename(
    incentive      = st_incentive,
    p              = st_buttons_game1,
    payd           = st_buttons_maxp,
    r              = st_buttons_game2,
    riskd          = st_buttons_minr,
    crt            = crttotal,
    tom            = tom_total,
    study1         = studio3,
    study2         = studio4,
    educationlevel = ciclo_new
  )

# ============================================================
# 3. CONSTRUCTION OF KEY VARIABLES
# ============================================================

df <- df %>%
  mutate(
    hypothetical = case_when(
      incentive == 0 ~ 1,
      incentive == 1 ~ 0,
      TRUE ~ NA_real_
    ),
    datedone2 = as.numeric(as.factor(datedone))
  )

# ============================================================
# 4. VARIABLE LABELS
# ============================================================

attr(df$hypothetical, "label")   <- "hypothetical (H)"
attr(df$p, "label")              <- "P"
attr(df$payd, "label")           <- "PayD"
attr(df$r, "label")              <- "R"
attr(df$riskd, "label")          <- "RiskD"
attr(df$age, "label")            <- "age"
attr(df$crt, "label")            <- "CRT"
attr(df$tom, "label")            <- "ToM"
attr(df$female, "label")         <- "female"
attr(df$game1first, "label")     <- "game1first (G1)"
attr(df$study1, "label")         <- "study1"
attr(df$study2, "label")         <- "study2"
attr(df$educationlevel, "label") <- "education level"

# ============================================================
# 5. SAMPLE FILTER
# ============================================================

df <- df %>%
  filter(!is.na(study1) | !is.na(study2))

# ============================================================
# 6. TABLE 1: PARTICIPANTS BY STUDY, ACADEMIC LEVEL AND GENDER
# ============================================================

make_study_table <- function(data_study) {
  
  data_study <- data_study %>%
    filter(educationlevel %in% c(4, 5))
  
  counts <- data_study %>%
    count(educationlevel, gender2, name = "n") %>%
    complete(
      educationlevel = c(4, 5),
      gender2 = c(0, 1, 2),
      fill = list(n = 0)
    )
  
  total_hs  <- sum(counts$n[counts$educationlevel == 4])
  total_uni <- sum(counts$n[counts$educationlevel == 5])
  total_all <- total_hs + total_uni
  
  get_n <- function(level, g) {
    counts %>%
      filter(educationlevel == level, gender2 == g) %>%
      pull(n)
  }
  
  hs_m <- get_n(4, 0)
  hs_f <- get_n(4, 1)
  hs_o <- get_n(4, 2)
  
  un_m <- get_n(5, 0)
  un_f <- get_n(5, 1)
  un_o <- get_n(5, 2)
  
  pct_f <- function(female_n, total_n) {
    if (is.na(total_n) || total_n == 0) return("-")
    sprintf("%.2f", 100 * female_n / total_n)
  }
  
  pct_hs  <- pct_f(hs_f, total_hs)
  pct_uni <- pct_f(un_f, total_uni)
  pct_all <- pct_f(hs_f + un_f, total_all)
  
  tibble::tibble(
    Row = c("Male", "Female", "Other", "% Female", "Total"),
    `High School` = c(hs_m, hs_f, hs_o, pct_hs, total_hs),
    University    = c(un_m, un_f, un_o, pct_uni, total_uni),
    All           = c(hs_m + un_m, hs_f + un_f, hs_o + un_o, pct_all, total_all)
  )
}

tab1 <- df %>%
  filter(study1 == 1) %>%
  make_study_table()

tab2 <- df %>%
  filter(study2 == 1) %>%
  make_study_table()

blank_row <- tibble::tibble(
  Row = "",
  `High School` = "",
  University = "",
  All = ""
)

Table1 <- bind_rows(
  tibble::tibble(Row = "Study 1", `High School` = "", University = "", All = ""),
  tab1,
  blank_row,
  tibble::tibble(Row = "Study 2", `High School` = "", University = "", All = ""),
  tab2
)

out_path_table1 <- "/Users/pablodelomas/Desktop/Table1.xlsx"

wb <- createWorkbook()
addWorksheet(wb, "Table 1")

writeData(wb, sheet = "Table 1", x = Table1, startRow = 1, startCol = 1)

style_bold <- createStyle(textDecoration = "bold")

study_rows <- which(Table1$Row %in% c("Study 1", "Study 2"))
addStyle(
  wb, "Table 1",
  style = style_bold,
  rows = study_rows + 1,
  cols = 1:4,
  gridExpand = TRUE,
  stack = TRUE
)

addStyle(
  wb, "Table 1",
  style = style_bold,
  rows = 1,
  cols = 1:4,
  gridExpand = TRUE
)

setColWidths(wb, "Table 1", cols = 1:4, widths = "auto")

saveWorkbook(wb, out_path_table1, overwrite = TRUE)

message("Excel file exported to: ", out_path_table1)

# ============================================================
# 7. FIGURE 1: STUDY 1 OUTCOMES BY INCENTIVE SCHEME
# ============================================================

# We restrict the sample to Study 1 participants.
data_fig1 <- df %>%
  filter(study1 == 1)

# We compute means and standard errors for the four main outcomes
# by incentive treatment.
summary_fig1 <- data_fig1 %>%
  group_by(incentive) %>%
  summarize(
    p_mean     = mean(p, na.rm = TRUE),
    payd_mean  = mean(payd, na.rm = TRUE),
    r_mean     = mean(r, na.rm = TRUE),
    riskd_mean = mean(riskd, na.rm = TRUE),
    p_se       = sd(p, na.rm = TRUE) / sqrt(sum(!is.na(p))),
    payd_se    = sd(payd, na.rm = TRUE) / sqrt(sum(!is.na(payd))),
    r_se       = sd(r, na.rm = TRUE) / sqrt(sum(!is.na(r))),
    riskd_se   = sd(riskd, na.rm = TRUE) / sqrt(sum(!is.na(riskd))),
    .groups = "drop"
  ) %>%
  pivot_longer(
    cols = c(p_mean, payd_mean, r_mean, riskd_mean,
             p_se, payd_se, r_se, riskd_se),
    names_to = c("variable", ".value"),
    names_pattern = "(p|payd|r|riskd)_(mean|se)"
  )

# Rename variables for display in the figure.
summary_fig1$variable <- recode(
  summary_fig1$variable,
  "p"     = "P",
  "payd"  = "PayD",
  "r"     = "R",
  "riskd" = "RiskD"
)

summary_fig1$variable <- factor(
  summary_fig1$variable,
  levels = c("P", "PayD", "R", "RiskD")
)

# Convert means and standard errors into percentages.
summary_fig1 <- summary_fig1 %>%
  mutate(
    mean = mean * 100,
    se   = se * 100
  )

# Factor for legend order: Market first, Hypothetical second in data coding,
# but labels shown as M and H.
summary_fig1$incentive <- factor(summary_fig1$incentive, levels = c(1, 0))

# ------------------------------------------------------------
# P-values: difference in proportions across incentive schemes
# ------------------------------------------------------------

calc_prop_pval <- function(data, var) {
  tmp <- data %>%
    filter(!is.na(.data[[var]]), !is.na(incentive))
  
  tbl <- table(tmp[[var]], tmp$incentive)
  
  if (all(dim(tbl) == c(2, 2))) {
    prop.test(tbl)$p.value
  } else {
    NA_real_
  }
}

pval_fig1 <- tibble(
  variable = c("P", "PayD", "R", "RiskD"),
  pval = c(
    calc_prop_pval(data_fig1, "p"),
    calc_prop_pval(data_fig1, "payd"),
    calc_prop_pval(data_fig1, "r"),
    calc_prop_pval(data_fig1, "riskd")
  )
) %>%
  mutate(
    signif_label = case_when(
      is.na(pval) ~ "",
      pval < 0.01 ~ "***",
      pval < 0.05 ~ "**",
      pval < 0.10 ~ "*",
      TRUE ~ ""
    ),
    label_text = ifelse(
      !is.na(pval),
      paste0(signif_label, " p = ", formatC(pval, format = "f", digits = 3)),
      ""
    )
  )

# ------------------------------------------------------------
# Reference thresholds by outcome
# ------------------------------------------------------------

thr_fig1 <- tibble(
  variable = factor(
    c("P", "PayD", "R", "RiskD"),
    levels = c("P", "PayD", "R", "RiskD")
  ),
  y = c(50, 12.5, 50, 12.5)
)

# ------------------------------------------------------------
# Build figure
# ------------------------------------------------------------

Figure1 <- ggplot(summary_fig1, aes(x = variable, y = mean, fill = factor(incentive))) +
  geom_bar(
    stat = "identity",
    position = position_dodge(width = 0.7),
    width = 0.6,
    colour = "black"
  ) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    position = position_dodge(width = 0.7),
    width = 0.2
  ) +
  geom_segment(
    data = thr_fig1,
    aes(
      x    = as.numeric(variable) - 0.35,
      xend = as.numeric(variable) + 0.35,
      y    = y,
      yend = y
    ),
    inherit.aes = FALSE,
    colour = "red",
    linetype = "dashed",
    linewidth = 1
  ) +
  scale_fill_manual(
    values = c("0" = "grey70", "1" = "white"),
    labels = c("0" = "H", "1" = "M"),
    name = NULL
  ) +
  scale_y_continuous(
    labels = scales::percent_format(scale = 1),
    limits = c(0, 100),
    breaks = seq(0, 100, by = 10)
  ) +
  labs(
    title = "",
    subtitle = "",
    x = "",
    y = "Percentage of subjects"
  ) +
  guides(fill = guide_legend(override.aes = list(colour = NA))) +
  theme_bw() +
  theme(
    text = element_text(size = 12),
    legend.position = c(0.98, 0.98),
    legend.justification = c("right", "top"),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_rect(fill = "white", colour = "black"),
    legend.key.height = unit(0.4, "lines"),
    legend.spacing.y = unit(0.1, "lines"),
    legend.text = element_text(size = 9, face = "italic"),
    legend.title = element_blank(),
    plot.title = element_text(size = 12, face = "italic"),
    plot.subtitle = element_text(size = 10, face = "italic", hjust = 0.5),
    axis.title.y = element_text(size = 10)
  ) +
  geom_text(
    data = summary_fig1 %>%
      group_by(variable) %>%
      summarize(y_pos = max(mean + se, na.rm = TRUE) + 3, .groups = "drop") %>%
      left_join(pval_fig1, by = "variable"),
    aes(x = variable, y = y_pos, label = label_text),
    inherit.aes = FALSE,
    size = 3,
    vjust = 0
  )

# Display figure in RStudio
Figure1

# Export Figure 1 as PNG (500 x 400 px)
ggsave(
  filename = "/Users/pablodelomas/Desktop/Figure1.png",
  plot = Figure1,
  width = 500 / 72,
  height = 400 / 72,
  dpi = 72
)

message("Figure exported to: /Users/pablodelomas/Desktop/Figure1.png")




# ============================================================
# 8. FIGURE 2: STUDY 2 OUTCOMES BY INCENTIVE SCHEME
# ============================================================

# We restrict the sample to Study 2 participants.
data_fig2 <- df %>%
  filter(study2 == 1)

# We compute means and standard errors for the four main outcomes
# by incentive treatment.
summary_fig2 <- data_fig2 %>%
  group_by(incentive) %>%
  summarize(
    p_mean     = mean(p, na.rm = TRUE),
    payd_mean  = mean(payd, na.rm = TRUE),
    r_mean     = mean(r, na.rm = TRUE),
    riskd_mean = mean(riskd, na.rm = TRUE),
    p_se       = sd(p, na.rm = TRUE) / sqrt(sum(!is.na(p))),
    payd_se    = sd(payd, na.rm = TRUE) / sqrt(sum(!is.na(payd))),
    r_se       = sd(r, na.rm = TRUE) / sqrt(sum(!is.na(r))),
    riskd_se   = sd(riskd, na.rm = TRUE) / sqrt(sum(!is.na(riskd))),
    .groups = "drop"
  ) %>%
  pivot_longer(
    cols = c(p_mean, payd_mean, r_mean, riskd_mean,
             p_se, payd_se, r_se, riskd_se),
    names_to = c("variable", ".value"),
    names_pattern = "(p|payd|r|riskd)_(mean|se)"
  )

# Rename variables for display in the figure.
summary_fig2$variable <- recode(
  summary_fig2$variable,
  "p"     = "P",
  "payd"  = "PayD",
  "r"     = "R",
  "riskd" = "RiskD"
)

summary_fig2$variable <- factor(
  summary_fig2$variable,
  levels = c("P", "PayD", "R", "RiskD")
)

# Convert means and standard errors into percentages.
summary_fig2 <- summary_fig2 %>%
  mutate(
    mean = mean * 100,
    se   = se * 100
  )

# We order the legend so that Market (M) appears first and
# Hypothetical (H) appears second.
summary_fig2$incentive <- factor(summary_fig2$incentive, levels = c(1, 0))

# ------------------------------------------------------------
# P-values: difference in proportions across incentive schemes
# ------------------------------------------------------------

pval_fig2 <- tibble(
  variable = c("P", "PayD", "R", "RiskD"),
  pval = c(
    calc_prop_pval(data_fig2, "p"),
    calc_prop_pval(data_fig2, "payd"),
    calc_prop_pval(data_fig2, "r"),
    calc_prop_pval(data_fig2, "riskd")
  )
) %>%
  mutate(
    signif_label = case_when(
      is.na(pval) ~ "",
      pval < 0.01 ~ "***",
      pval < 0.05 ~ "**",
      pval < 0.10 ~ "*",
      TRUE ~ ""
    ),
    label_text = ifelse(
      !is.na(pval),
      paste0(signif_label, " p = ", formatC(pval, format = "f", digits = 3)),
      ""
    )
  )

# ------------------------------------------------------------
# Reference thresholds by outcome
# ------------------------------------------------------------

thr_fig2 <- tibble(
  variable = factor(
    c("P", "PayD", "R", "RiskD"),
    levels = c("P", "PayD", "R", "RiskD")
  ),
  y = c(50, 12.5, 50, 12.5)
)

# ------------------------------------------------------------
# Build figure
# ------------------------------------------------------------

Figure2 <- ggplot(
  summary_fig2,
  aes(x = variable, y = mean, fill = factor(incentive, levels = c(1, 0)))
) +
  geom_bar(
    stat = "identity",
    position = position_dodge(width = 0.7),
    width = 0.6,
    colour = "black"
  ) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    position = position_dodge(width = 0.7),
    width = 0.2
  ) +
  geom_segment(
    data = thr_fig2,
    aes(
      x    = as.numeric(variable) - 0.35,
      xend = as.numeric(variable) + 0.35,
      y    = y,
      yend = y
    ),
    inherit.aes = FALSE,
    colour = "red",
    linetype = "dashed",
    linewidth = 1
  ) +
  scale_fill_manual(
    values = c("1" = "white", "0" = "grey70"),
    labels = c("1" = "M", "0" = "H"),
    name   = NULL
  ) +
  scale_y_continuous(
    labels = scales::percent_format(scale = 1),
    limits = c(0, 100),
    breaks = seq(0, 100, by = 10)
  ) +
  labs(
    title = "",
    subtitle = "",
    x = "",
    y = "Percentage of subjects"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 12),
    legend.position = c(0.98, 0.98),
    legend.justification = c("right", "top"),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_rect(fill = "white", colour = "black", linewidth = 0.5),
    legend.key.height = unit(0.4, "lines"),
    legend.spacing.y = unit(0.1, "lines"),
    legend.text = element_text(face = "italic"),
    legend.title = element_blank(),
    plot.title = element_text(size = 12, face = "italic"),
    plot.subtitle = element_text(size = 10, face = "italic", hjust = 0.5),
    axis.title.y = element_text(size = 10)
  ) +
  geom_text(
    data = summary_fig2 %>%
      group_by(variable) %>%
      summarize(y_pos = max(mean + se, na.rm = TRUE) + 3, .groups = "drop") %>%
      left_join(pval_fig2, by = "variable"),
    aes(x = variable, y = y_pos, label = label_text),
    inherit.aes = FALSE,
    size = 3,
    vjust = 0
  )

# Display figure in RStudio
Figure2

# Export Figure 2 as PNG (500 x 400 px)
ggsave(
  filename = "/Users/pablodelomas/Desktop/Figure2.png",
  plot = Figure2,
  width = 500 / 72,
  height = 400 / 72,
  dpi = 72
)

message("Figure exported to: /Users/pablodelomas/Desktop/Figure2.png")




# ============================================================
# 9. FIGURE A1: STUDY 2 OUTCOMES BY INCENTIVE SCHEME
#    AND GAME ORDER
# ============================================================

# We restrict the sample to Study 2 participants.
data_figA1 <- df %>%
  filter(study2 == 1)

# We compute means and standard errors for the four main outcomes
# by incentive treatment and game order.
summary_figA1 <- data_figA1 %>%
  group_by(incentive, game1first) %>%
  summarize(
    p_mean     = mean(p, na.rm = TRUE),
    payd_mean  = mean(payd, na.rm = TRUE),
    r_mean     = mean(r, na.rm = TRUE),
    riskd_mean = mean(riskd, na.rm = TRUE),
    p_se       = sd(p, na.rm = TRUE) / sqrt(sum(!is.na(p))),
    payd_se    = sd(payd, na.rm = TRUE) / sqrt(sum(!is.na(payd))),
    r_se       = sd(r, na.rm = TRUE) / sqrt(sum(!is.na(r))),
    riskd_se   = sd(riskd, na.rm = TRUE) / sqrt(sum(!is.na(riskd))),
    .groups = "drop"
  ) %>%
  pivot_longer(
    cols = c(p_mean, payd_mean, r_mean, riskd_mean,
             p_se, payd_se, r_se, riskd_se),
    names_to = c("variable", ".value"),
    names_pattern = "(p|payd|r|riskd)_(mean|se)"
  )

# Rename variables for display in the figure.
summary_figA1$variable <- recode(
  summary_figA1$variable,
  "p"     = "P",
  "payd"  = "PayD",
  "r"     = "R",
  "riskd" = "RiskD"
)

summary_figA1$variable <- factor(
  summary_figA1$variable,
  levels = c("P", "PayD", "R", "RiskD")
)

# Convert means and standard errors into percentages.
summary_figA1 <- summary_figA1 %>%
  mutate(
    mean = mean * 100,
    se   = se * 100
  )

# Create labels for game order to ensure the facet order is correct.
summary_figA1$game1first_label <- factor(
  summary_figA1$game1first,
  levels = c(1, 0),
  labels = c("Order: Game 1, Game 2", "Order: Game 2, Game 1")
)

# ------------------------------------------------------------
# P-values within each game order block
# ------------------------------------------------------------

# This function computes the p-value for the difference in proportions
# between incentive schemes within each game order condition.
get_pval_by_order <- function(data, g1, var_name) {
  data_sub <- data %>%
    filter(game1first == g1, !is.na(.data[[var_name]]), !is.na(incentive))
  
  if (nrow(data_sub) == 0) return(NA_real_)
  
  tbl <- table(data_sub[[var_name]], data_sub$incentive)
  
  if (all(dim(tbl) == c(2, 2))) {
    prop.test(tbl)$p.value
  } else {
    NA_real_
  }
}

# Build the p-value table by variable and game order.
pval_figA1 <- tidyr::crossing(
  variable_raw = c("p", "payd", "r", "riskd"),
  game1first   = c(1, 0)
) %>%
  rowwise() %>%
  mutate(
    pval = get_pval_by_order(data_figA1, game1first, variable_raw)
  ) %>%
  ungroup() %>%
  mutate(
    variable = recode(
      variable_raw,
      "p"     = "P",
      "payd"  = "PayD",
      "r"     = "R",
      "riskd" = "RiskD"
    ),
    game1first_label = factor(
      game1first,
      levels = c(1, 0),
      labels = c("Order: Game 1, Game 2", "Order: Game 2, Game 1")
    ),
    signif_label = case_when(
      is.na(pval) ~ "",
      pval < 0.01 ~ "***",
      pval < 0.05 ~ "**",
      pval < 0.10 ~ "*",
      TRUE ~ ""
    ),
    label_text = ifelse(
      !is.na(pval),
      paste0(signif_label, " p = ", formatC(pval, format = "f", digits = 3)),
      ""
    )
  ) %>%
  select(variable, game1first_label, pval, signif_label, label_text)

# ------------------------------------------------------------
# Reference thresholds by outcome
# ------------------------------------------------------------

thr_figA1 <- tibble(
  variable = factor(
    c("P", "PayD", "R", "RiskD"),
    levels = c("P", "PayD", "R", "RiskD")
  ),
  y = c(50, 12.5, 50, 12.5)
)

# ------------------------------------------------------------
# Build figure
# ------------------------------------------------------------

FigureA1 <- ggplot(
  summary_figA1,
  aes(x = variable, y = mean, fill = factor(incentive, levels = c(1, 0)))
) +
  geom_bar(
    stat = "identity",
    position = position_dodge(width = 0.7),
    width = 0.6,
    colour = "black"
  ) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    position = position_dodge(width = 0.7),
    width = 0.2
  ) +
  geom_segment(
    data = thr_figA1,
    aes(
      x    = as.numeric(variable) - 0.35,
      xend = as.numeric(variable) + 0.35,
      y    = y,
      yend = y
    ),
    inherit.aes = FALSE,
    colour = "red",
    linetype = "dashed",
    linewidth = 1
  ) +
  scale_fill_manual(
    values = c("1" = "white", "0" = "grey70"),
    labels = c("1" = "M", "0" = "H"),
    name = NULL
  ) +
  facet_grid(. ~ game1first_label) +
  scale_y_continuous(
    labels = scales::percent_format(scale = 1),
    limits = c(0, 100),
    breaks = seq(0, 100, by = 10)
  ) +
  labs(
    x = "",
    y = "Percentage of subjects"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 12),
    legend.position = c(0.98, 0.98),
    legend.justification = c("right", "top"),
    legend.background = element_blank(),
    legend.box.background = element_blank(),
    legend.key = element_rect(fill = "white", colour = "black", linewidth = 0.5),
    legend.key.height = unit(0.4, "lines"),
    legend.spacing.y = unit(0.1, "lines"),
    legend.text = element_text(face = "italic"),
    legend.title = element_blank(),
    plot.title = element_text(size = 12, face = "italic"),
    plot.subtitle = element_text(size = 10, face = "italic", hjust = 0.5),
    axis.title.y = element_text(size = 10)
  ) +
  geom_text(
    data = summary_figA1 %>%
      group_by(variable, game1first_label) %>%
      summarize(y_pos = max(mean + se, na.rm = TRUE) + 3, .groups = "drop") %>%
      left_join(pval_figA1, by = c("variable", "game1first_label")),
    aes(x = variable, y = y_pos, label = label_text),
    inherit.aes = FALSE,
    size = 3,
    vjust = 0
  )

# Display figure in RStudio
FigureA1

# Export Figure A1 as PNG (500 x 400 px)
ggsave(
  filename = "/Users/pablodelomas/Desktop/FigureA1.png",
  plot = FigureA1,
  width = 500 / 72,
  height = 400 / 72,
  dpi = 72
)

message("Figure exported to: /Users/pablodelomas/Desktop/FigureA1.png")